miχpods - TAO
Contents
miχpods - TAO#
eulerian 0-40m control volume
focus on Jq
use model chl / double exponentials
%load_ext watermark
import os
import cf_xarray
import dask
import dcpy
import distributed
import flox.xarray
import hvplot.xarray
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import dcpy
import glob
import pump
from pump import mixpods
dask.config.set({"array.slicing.split_large_chunks": False})
mpl.rcParams["figure.dpi"] = 140
xr.set_options(keep_attrs=True)
gcmdir = "/glade/campaign/cgd/oce/people/bachman/TPOS_1_20_20_year/OUTPUT/" # MITgcm output directory
stationdirname = gcmdir
%watermark -iv
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/dask_jobqueue/core.py:20: FutureWarning: tmpfile is deprecated and will be removed in a future release. Please use dask.utils.tmpfile instead.
from distributed.utils import tmpfile
matplotlib : 3.6.3
dask : 2023.1.0
distributed: 2023.1.0
flox : 0.6.7
hvplot : 0.8.2
json : 2.0.9
numpy : 1.23.5
xarray : 2023.1.0
pump : 0.1
cf_xarray : 0.7.7
dcpy : 0.1.dev385+g121534c
sys : 3.10.8 | packaged by conda-forge | (main, Nov 22 2022, 08:26:04) [GCC 10.4.0]
import dask_jobqueue
if "client" in locals():
client.close()
del client
if "cluster" in locals():
cluster.close()
cluster = dask_jobqueue.PBSCluster(
cores=4, # The number of cores you want
memory="23GB", # Amount of memory
processes=1, # How many processes
queue="casper", # The type of queue to utilize (/glade/u/apps/dav/opt/usr/bin/execcasper)
local_directory="/local_scratch/pbs.$PBS_JOBID",
log_directory="/glade/scratch/dcherian/dask/", # Use your local directory
resource_spec="select=1:ncpus=4:mem=23GB", # Specify resources
project="ncgd0011", # Input your project ID here
walltime="02:00:00", # Amount of wall time
interface="ib0", # Interface to use
)
cluster.adapt(minimum_jobs=2, maximum_jobs=8)
# cluster.scale(4)
client = distributed.Client(cluster)
client
Client
Client-3953f65f-a36d-11ed-91ee-3cecef1acbfa
| Connection method: Cluster object | Cluster type: dask_jobqueue.PBSCluster |
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status |
Cluster Info
PBSCluster
e60dbc56
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status | Workers: 0 |
| Total threads: 0 | Total memory: 0 B |
Scheduler Info
Scheduler
Scheduler-c2c360ba-a15d-4c06-940f-fe48464e1bf0
| Comm: tcp://10.12.206.63:40066 | Workers: 0 |
| Dashboard: https://jupyterhub.hpc.ucar.edu/stable/user/dcherian/proxy/8787/status | Total threads: 0 |
| Started: Just now | Total memory: 0 B |
Workers
tao_gridded = mixpods.load_tao()
tao_gridded["Ih"] = 0.45 * tao_gridded.swnet * np.exp(-0.04 * np.abs(tao_gridded.mldT))
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "depth" starting at index 42. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "time" starting at index 199726. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
/glade/u/home/dcherian/miniconda3/envs/pump/lib/python3.10/site-packages/xarray/core/dataset.py:256: UserWarning: The specified Dask chunks separate the stored chunks along dimension "longitude" starting at index 2. This could degrade performance. Instead, consider rechunking after loading.
warnings.warn(
Moum et al (2013) Climatologies#
The published .nc file seems “over cleaned” with mask ε < 3e-6 or so
tropflux = xr.open_mfdataset(
sorted(glob.glob("/glade/work/dcherian/datasets/tropflux/**/*.nc")),
engine="netcdf4",
parallel=True,
)
tropflux_0140 = tropflux.interp(latitude=0, longitude=360 - 140).load()
tropflux_clim = tropflux_0140.groupby("time.month").mean()
Monthly climatology, Vertical mean#
Didn’t work with published nc file#
clim = xr.Dataset()
clim["netflux"] = (
tropflux_0140.netflux.sel(time=slice("2005", "2011")).groupby("time.month").mean()
)
clim["Ih"] = tao_gridded.Ih.sel(time=slice("2005", "2011")).groupby("time.month").mean()
clim["Jq"] = (
tao_gridded.Jq.sel(time=slice("2005", "2011"), depthchi=slice(-60, -20))
.groupby("time.month")
.mean(["depthchi", "time"])
)
(-1 * clim.Jq).plot(ylim=(0, 200))
(clim.netflux - clim.Ih).plot(ylim=(0, 200))
[<matplotlib.lines.Line2D>]
Try with .mat file#
cchdo = dcpy.oceans.read_cchdo_chipod_file(
"/glade/u/home/dcherian/datasets/microstructure/osu/chipod/tao/chipods_0_140W.nc"
)
cchdo
<xarray.Dataset>
Dimensions: (depth: 7, time: 107588)
Coordinates:
timeSeries (depth) float64 ...
* time (time) datetime64[ns] 2005-09-23T04:30:00 ... 2017-12-31T23:2...
lat (depth) float64 dask.array<chunksize=(7,), meta=np.ndarray>
lon (depth) float64 dask.array<chunksize=(7,), meta=np.ndarray>
* depth (depth) float64 29.0 39.0 49.0 59.0 69.0 89.0 119.0
Data variables:
T (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
dTdz (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
N2 (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
KT (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
chi (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
eps (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
Jq (depth, time) float64 dask.array<chunksize=(7, 10000), meta=np.ndarray>
Attributes: (12/68)
ncei_template_version: NCEI_NetCDF_TimeSeries_Orthogonal_Templa...
featureType: timeSeries
title: Turbulence quantities measured by chipod...
summary: Turbulence data in upper w m / all good ...
keywords: sea_water_temperature, square_of_brunt_v...
Conventions: CF-1.6, ACDD-1.3
... ...
reference8: Moum, J.N., Ocean speed and turbulence m...
reference9: Warner, S.J., J. Becherer, K. Pujiana, E...
reference10: Moum, J.N., K. Pujiana, R-C. Lien and W....
reference11: Becherer, J. and J.N. Moum, An efficient...
reference12: Moulin, A.J., J.N. Moum and E.L. Shroyer...
reference13: Thakur, R., E.L. Shroyer, R. Govindaraja...def read_chipod_mat_file(fname):
import h5py
import pandas as pd
file = h5py.File(
"/glade/u/home/dcherian/work/pump/datasets/microstructure/chipods_0_140W_hourly.mat",
mode="r",
)
chipod = xr.Dataset()
chipod["depth"] = ("depth", file["chr"]["depth"][:].squeeze())
chipod["time"] = ("time", file["chr"]["time"][:].squeeze())
chipod["time"] = pd.to_datetime(chipod.time.data - 719529, unit="D")
for var in ["Jq", "Kt", "N2", "T", "chi", "dTdz", "eps"]:
chipod[var] = xr.DataArray(file["chr"][var][:, :], dims=("time", "depth"))
return chipod
mat = read_chipod_mat_file(
os.path.expanduser("~/work/pump/datasets/microstructure/chipods_0_140W_hourly.mat")
)
mat.Jq.where(np.abs(mat.Jq) < 10000).groupby("time.year").mean().sel(
year=slice(2005, 2011)
).hvplot.line(
by="year", y="depth", flip_yaxis=True
) # .opts(legend_cols=1)
Compare#
h1 = (
cchdo.Jq.reset_coords(drop=True)
.resample(time="M")
.mean()
.hvplot.quadmesh(
cmap="greys",
flip_yaxis=True,
frame_width=1200,
frame_height=200,
clim=(40, -100),
)
)
h2 = (
mat.Jq.resample(time="M")
.mean()
.hvplot.quadmesh(
cmap="greys",
flip_yaxis=True,
frame_width=1200,
frame_height=200,
clim=(40, -100),
)
)
(h2.opts(title=".mat") + h1.opts(title="CCHDO")).cols(1)
h = (
(
np.abs(cchdo.Jq.resample(time="D").mean())
.reset_coords(drop=True)
.hvplot.line(by="depth", x="time")
.opts(frame_width=1200)
+ np.abs(mat.Jq.resample(time="D").mean())
.reset_coords(drop=True)
.hvplot.line(by="depth", x="time")
.opts(frame_width=1200)
)
.opts(shared_axes=True)
.cols(1)
)
h
def plot_monthly_clim(ds, **kwargs):
return np.abs(
ds.Jq.sel(time=slice("2005", "2011-02"))
.sel(depth=slice(20, 60))
.mean("depth")
.groupby("time.month")
.mean()
).hvplot.line(**kwargs)
(
plot_monthly_clim(mat, label=".mat")
* plot_monthly_clim(mat.where(np.abs(mat.Jq) < 20000), label=".mat, Jq < 20000")
* plot_monthly_clim(mat.where(np.abs(mat.Jq) < 10000), label=".mat, Jq < 10000")
* plot_monthly_clim(mat.where(np.abs(mat.Jq) < 3000), label=".mat, Jq < 3000")
* plot_monthly_clim(mat.where(mat.eps < 5e-4), label=".mat, ε < 5e-4")
* plot_monthly_clim(mat.where(mat.eps < 1e-5), label=".mat, ε < 1e-5")
* plot_monthly_clim(mat.where(mat.eps < 3e-6), label=".mat, ε < 3e-6")
* plot_monthly_clim(cchdo, label="CCHDO", color="k")
* (clim.netflux - clim.Ih)
.rename("a")
.hvplot.line(label="Jq0 - Ih", color="k", line_width=4)
).opts(show_grid=True, legend_position="right", frame_width=600, ylim=(0, 160))
Seasonal climatology, Vertical profile#
def plot_seasonal_clim(ds, **kwargs):
return (
np.abs(
ds.Jq.sel(time=slice("2005", "2011-02"))
.sel(depth=slice(20, 70))
# .mean("depth")
.groupby("time.season")
.mean()
.sel(season=["DJF", "MAM", "JJA", "SON"])
).hvplot.line(flip_yaxis=True, y="depth", col="season", **kwargs)
# .opts()
)
h = (
plot_seasonal_clim(mat, label=".mat")
* plot_seasonal_clim(mat.where(mat.eps < 1e-4), label=".mat, ε < 1e-4")
* plot_seasonal_clim(mat.where(mat.eps < 1e-5), label=".mat, ε < 1e-5")
* plot_seasonal_clim(mat.where(mat.eps < 3e-6), label=".mat, ε < 3e-6")
* plot_seasonal_clim(cchdo, label="CCHDO", color="k")
)
h.opts(plot_size=(270, 350), shared_xaxis=True, shared_yaxis=True, show_legend=True)
from cycler import cycler
with plt.rc_context({"axes.prop_cycle": cycler(color=["k", "r", "b", "g"])}):
(
(-1 * cchdo.Jq.where(cchdo.Jq > -1e3).squeeze())
.sel(time=slice("2005", "2012-02-01"))
.groupby("time.season")
.mean()
.sel(season=["DJF", "MAM", "JJA", "SON"])
.cf.plot(y="depth", hue="season")
)
Labeling ENSO phase transitions#
pump.obs.process_oni().sel(time=slice("2005-Oct", "2016-Feb")).reset_coords(
drop=True
).plot.line(aspect=3, size=5, color="k")
pump.obs.make_enso_transition_mask().sel(
time=slice("2005-Oct", "2016-Feb")
).reset_coords(drop=True).plot.line(ax=plt.gca().twinx(), marker="x")
plt.grid(True, which="both", axis="both")
N2 vs N2T#
Lot more data with N2T
tao_gridded.N2.cf.plot()
<matplotlib.collections.QuadMesh>
tao_gridded.N2T.cf.plot()
<matplotlib.collections.QuadMesh>
PDFs change#
fg = tao_gridded.n2s2pdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
).plot(vmax=1, col="enso_transition_phase")
fg.set_titles("{value}")
fg.map(dcpy.plots.line45)
fg.axes[0, 0].set_xlim((-4.5, -2.5))
fg.axes[0, 0].set_ylim((-4.5, -2.5))
(-4.5, -2.5)
(
tao_gridded.n2s2pdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
)
.sum("N2_bins")
.plot(hue="enso_transition_phase")
)
plt.figure()
(
tao_gridded.n2s2pdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
)
.sum("S2_bins")
.plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>]
fg = tao_gridded.n2s2Tpdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
).plot(vmax=1, col="enso_transition_phase")
fg.set_titles("{value}")
fg.map(dcpy.plots.line45)
fg.axes[0, 0].set_xlim((-4.5, -2.5))
fg.axes[0, 0].set_ylim((-4.5, -2.5))
(-4.5, -2.5)
(
tao_gridded.n2s2Tpdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
)
.sum("N2_bins")
.plot(hue="enso_transition_phase")
)
plt.figure()
(
tao_gridded.n2s2Tpdf.sel(
enso_transition_phase=[
"La-Nina cool",
"La-Nina warm",
"El-Nino warm",
"El-Nino cool",
]
)
.sum("S2_bins")
.plot(hue="enso_transition_phase")
)
[<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>,
<matplotlib.lines.Line2D>]